In [1]:
import sys
sys.path.append('..')
from data.model.metrics import TRAINING_METRICS, VALIDATION_METRICS, FEATURES_IMPORTANCES
from data.labeled.preprocessed import  RISKS_MAPPING as risks
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

Features that were selected from the final model

In [2]:
for risk, importance in FEATURES_IMPORTANCES.items():
    print(f"Risk: {risks[risk]}")
    plt.figure(figsize=[5,5])
    importance.sort_values(ascending=True).plot.barh()
    plt.show()
Risk: Higher water prices
Risk: Inadequate or aging infrastructure
Risk: Increased water stress or scarcity
Risk: Declining water quality
Risk: Increased water demand
Risk: Regulatory
Risk: Energy supply issues

The following metrics were computed by considering a validation set

Training Metrics

In [3]:
for risk,metrics in TRAINING_METRICS.items():
    print('Risk:', risks[risk])
    print("Confusion Matrix:")
    plt.figure()
    sns.heatmap(metrics['confusion_matrix'], annot=True)
    plt.show()
    print("Regression Report")
    display(pd.DataFrame([metrics['regression_report']]))
    print("Classification Report")
    display(pd.DataFrame(metrics['classification_report']))
    
Risk: Higher water prices
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.681154 0.5817 0.383081
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 1.000000 0.163265 0.0 0.0 0.316667 0.290816 0.555102
recall 0.343750 1.000000 0.0 0.0 0.316667 0.335938 0.316667
f1-score 0.511628 0.280702 0.0 0.0 0.316667 0.198082 0.310295
support 32.000000 8.000000 18.0 2.0 0.316667 60.000000 60.000000
Risk: Inadequate or aging infrastructure
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.714576 0.639898 0.18765
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.0 0.025000 0.761905 0.0 0.475728 0.196726 0.526410
recall 0.0 0.200000 0.676056 0.0 0.475728 0.219014 0.475728
f1-score 0.0 0.044444 0.716418 0.0 0.475728 0.190216 0.495999
support 24.0 5.000000 71.000000 3.0 0.475728 103.000000 103.000000
Risk: Increased water stress or scarcity
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.307323 0.249674 0.167166
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.0 0.666667 0.743017 0.0 0.741758 0.352421 0.605245
recall 0.0 0.117647 1.000000 0.0 0.741758 0.279412 0.741758
f1-score 0.0 0.200000 0.852564 0.0 0.741758 0.263141 0.641709
support 2.0 17.000000 133.000000 30.0 0.741758 182.000000 182.000000
Risk: Declining water quality
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.667384 0.671731 0.272835
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.0 0.137931 0.606061 0.0 0.5 0.185998 0.342868
recall 0.0 0.266667 0.869565 0.0 0.5 0.284058 0.500000
f1-score 0.0 0.181818 0.714286 0.0 0.5 0.224026 0.406351
support 26.0 15.000000 69.000000 18.0 0.5 128.000000 128.000000
Risk: Increased water demand
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.651141 0.555782 0.477132
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 1.000000 0.225 0.666667 0.0 0.485294 0.472917 0.689951
recall 0.571429 0.900 0.320000 0.0 0.485294 0.447857 0.485294
f1-score 0.727273 0.360 0.432432 0.0 0.485294 0.379926 0.511389
support 28.000000 10.000 25.000000 5.0 0.485294 68.000000 68.000000
Risk: Regulatory
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.396027 0.258317 0.603623
Classification Report
0.0 1.0 2.0 accuracy macro avg weighted avg
precision 1.000000 0.222222 0.0 0.688889 0.407407 0.730864
recall 0.843750 1.000000 0.0 0.688889 0.614583 0.688889
f1-score 0.915254 0.363636 0.0 0.688889 0.426297 0.683171
support 32.000000 4.000000 9.0 0.688889 45.000000 45.000000
Risk: Energy supply issues
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.463666 0.415226 0.246383
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.971429 0.0 0.0 0.0 0.829268 0.242857 0.829268
recall 0.971429 0.0 0.0 0.0 0.829268 0.242857 0.829268
f1-score 0.971429 0.0 0.0 0.0 0.829268 0.242857 0.829268
support 35.000000 1.0 4.0 1.0 0.829268 41.000000 41.000000

Validation Metrics

In [4]:
import seaborn as sns

for risk,metrics in VALIDATION_METRICS.items():
    print('Risk:', risks[risk])
    print("Confusion Matrix:")
    plt.figure()
    sns.heatmap(metrics['confusion_matrix'], annot=True)
    plt.show()
    print("Regression Report")
    display(pd.DataFrame([metrics['regression_report']]))
    print("Classification Report")
    display(pd.DataFrame(metrics['classification_report']))
Risk: Higher water prices
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.792797 0.835675 0.131611
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.500000 0.160000 0.0 0.0 0.185185 0.165000 0.282963
recall 0.071429 1.000000 0.0 0.0 0.185185 0.267857 0.185185
f1-score 0.125000 0.275862 0.0 0.0 0.185185 0.100216 0.105683
support 14.000000 4.000000 8.0 1.0 0.185185 27.000000 27.000000
Risk: Inadequate or aging infrastructure
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.779716 0.756456 -0.008272
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.0 0.052632 0.692308 0.0 0.422222 0.186235 0.480432
recall 0.0 0.333333 0.580645 0.0 0.422222 0.228495 0.422222
f1-score 0.0 0.090909 0.631579 0.0 0.422222 0.180622 0.441148
support 10.0 3.000000 31.000000 1.0 0.422222 45.000000 45.000000
Risk: Increased water stress or scarcity
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.337476 0.287249 0.04971
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.0 0.0 0.734177 0.0 0.734177 0.183544 0.539016
recall 0.0 0.0 1.000000 0.0 0.734177 0.250000 0.734177
f1-score 0.0 0.0 0.846715 0.0 0.734177 0.211679 0.621639
support 1.0 7.0 58.000000 13.0 0.734177 79.000000 79.000000
Risk: Declining water quality
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.724953 0.753705 0.158114
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.0 0.058824 0.631579 0.0 0.454545 0.172601 0.351984
recall 0.0 0.142857 0.800000 0.0 0.454545 0.235714 0.454545
f1-score 0.0 0.083333 0.705882 0.0 0.454545 0.197304 0.395633
support 11.0 7.000000 30.000000 7.0 0.454545 55.000000 55.000000
Risk: Increased water demand
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.864344 0.983377 0.082973
Classification Report
0.0 1.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.666667 0.190476 0.666667 0.0 0.333333 0.380952 0.558730
recall 0.333333 1.000000 0.166667 0.0 0.333333 0.375000 0.333333
f1-score 0.444444 0.320000 0.266667 0.0 0.333333 0.257778 0.327111
support 12.000000 4.000000 12.000000 2.0 0.333333 30.000000 30.000000
Risk: Regulatory
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.642533 0.665608 -0.022706
Classification Report
0.0 1.0 2.0 accuracy macro avg weighted avg
precision 0.750000 0.125 0.0 0.5 0.291667 0.537500
recall 0.642857 0.500 0.0 0.5 0.380952 0.500000
f1-score 0.692308 0.200 0.0 0.5 0.297436 0.504615
support 14.000000 2.000 4.0 0.5 20.000000 20.000000
Risk: Energy supply issues
Confusion Matrix:
Regression Report
MAE MSE Explained Variance
0 0.578727 0.78408 0.020631
Classification Report
0.0 2.0 3.0 accuracy macro avg weighted avg
precision 0.833333 0.0 0.0 0.833333 0.277778 0.694444
recall 1.000000 0.0 0.0 0.833333 0.333333 0.833333
f1-score 0.909091 0.0 0.0 0.833333 0.303030 0.757576
support 15.000000 2.0 1.0 0.833333 18.000000 18.000000

Average MSE for training

In [5]:
training_mse = pd.Series({risks[risk]: metric['regression_report']['MSE'] for risk,metric in TRAINING_METRICS.items()})
display(training_mse)
training_mse.describe()
Higher water prices                   0.581700
Inadequate or aging infrastructure    0.639898
Increased water stress or scarcity    0.249674
Declining water quality               0.671731
Increased water demand                0.555782
Regulatory                            0.258317
Energy supply issues                  0.415226
dtype: float64
Out[5]:
count    7.000000
mean     0.481761
std      0.175490
min      0.249674
25%      0.336772
50%      0.555782
75%      0.610799
max      0.671731
dtype: float64

Average MSE for validation

In [6]:
valid_mse = pd.Series({risks[risk]: metric['regression_report']['MSE'] for risk,metric in VALIDATION_METRICS.items()})
display(valid_mse)
valid_mse.describe()
Higher water prices                   0.835675
Inadequate or aging infrastructure    0.756456
Increased water stress or scarcity    0.287249
Declining water quality               0.753705
Increased water demand                0.983377
Regulatory                            0.665608
Energy supply issues                  0.784080
dtype: float64
Out[6]:
count    7.000000
mean     0.723736
std      0.215814
min      0.287249
25%      0.709657
50%      0.756456
75%      0.809878
max      0.983377
dtype: float64